PolyOptimalIRT:

#library(gridExtra)
library(ggpubr)
library(ggplot2)
library(mirt)
library(TestGardener)

load("data/NatMath_fittedmodel_1cyc.RData")
load("data/NatMath_fittedmodel.RData")
load("data/NatMath_fittedmodel_mirt.RData")
# AnalyzeResult <- AnalyzeResult2
cycle=10

optthetas <- AnalyzeResult$parList[[cycle]]$theta
optescores <- testscore(optthetas,
                     AnalyzeResult$parList[[cycle]]$WfdList,
                     NatMath_dataList$optList)
parthetas <- fscores(natMathGpcm,
                   method = "EAP",
                   response.pattern = NULL)[, "F1"]
parescores <- expected.test(natMathGpcm, matrix(parthetas, ncol = 1))


Wbinsmth.plot(AnalyzeResult$parList[[cycle]]$binctr, AnalyzeResult$parList[[cycle]]$Qvec,
              AnalyzeResult$parList[[cycle]]$WfdList,
              NatMath_dataList, Wrng=c(0,7), saveplot = F)
dataSimulation()
Wbinsmth()
Sensitivity.plot()

# från Sensitivity.plot för att räkna ut perc. från theta
test <- eval.surp(c(100), AnalyzeResult$parList[[cycle]]$WfdList[[19]]$Wfd)

sum(1/ncol(test)^test)

AnalyzeResult$parList[[cycle]]$WfdList[[20]]$Wfd$coefs
AnalyzeResult$parList[[cycle]]$WfdList[[20]]$Wfd$basis
AnalyzeResult$parList[[cycle]]$WfdList[[20]]$Wfd$fdnames
AnalyzeResult$parList[[cycle]]$WfdList[[20]]$Wfd

# parToOpt <- sapply(parescores, sumScoreToIRT, AnalyzeResult$parList[[cycle]]$WfdList,
#                    thetamin=0, thetamax=100,
#         NatMath_dataList$optList, verbose=TRUE)
# optToPar <- sapply(optescores, sumScoreToIRT, natMathGpcm, thetamin=-3.5, thetamax=3.5, verbose=TRUE)
#save(optToPar, parToOpt, file = "data/NatMath_transformed.rda")
load(file = "data/NatMath_transformed.rda")

df <- data.frame(parToOpt, optToPar, optthetas, parthetas, optescores, parescores, sumscores=rowSums(U))

# get empirical CDF of fitted indexes
optcdf <- ecdf(optthetas)
parcdf <- ecdf(parthetas)
optperc <- optcdf(seq(0, 100))
parperc <- parcdf(seq(0, 100))
percentiles <- seq(0, 100)/100
percentilesopt <- quantile(optthetas, percentiles, type = 7)
percentilespar <- quantile(parthetas, percentiles, type = 7)
percentilesopt2 <- quantile(optescores, percentiles, type = 7)
plot(percentiles, percentilespar) # approx the same as N(0, 1)-distr. would have
plot(percentiles, percentilesopt) #
plot(percentilesopt, percentilespar) # lots of duplicates of opt gives sample perc.
plot(percentilesopt, percentilesopt2) # non-linear because the scales are different

p1 <- ggplot(df, aes(x=optthetas)) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20) +
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.001)) +
    xlab(bquote("Nonparametric" ~ theta)) + ylab("density")


# Thetas ------------------------------------------------------------------
plot(parToOpt, optthetas)
plot(scaleThetaFromInf(0, 100,parthetas), optthetas)
plot(optescores, optthetas)
plot(parescores, parthetas)


test <- tidyr::pivot_longer(df, cols = c("parToOpt", "optthetas"), names_to = "method", values_to = "theta")
test2 <- tidyr::pivot_longer(df, cols = c("optToPar", "parthetas"), names_to = "method", values_to = "theta")

pdf("plots/NatMath theta plots.pdf")
p1 <- ggplot(df, aes(x=optthetas)) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20)+
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.001))+
    xlab(bquote("Nonparametric" ~ theta)) + ylab("density")
p2 <- ggplot(df, aes(x=parToOpt)) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20)+
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.001))+
    xlab(bquote("Nonparametric" ~ theta)) + ylab("density")
p3 <- ggplot(df, aes(x=parthetas)) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20)+
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.05))+
    xlim(-2.5, 2.5)+
    xlab(bquote("Parametric" ~ theta)) + ylab("density")
p4 <- ggplot(df, aes(x=optToPar)) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20)+
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.05))+
    xlim(-2.5, 2.5)+
    xlab(bquote("Parametric" ~ theta)) + ylab("density")
ggarrange(p1, p4, p2, p3,
          labels = c("Opt.", "Opt.", "GPCM", "GPCM"),
          ncol = 2, nrow = 2)
dev.off()

pdf("plots/NatMath theta plots logistic.pdf")
p1 <- ggplot(df, aes(x=optthetas)) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20)+
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.001))+
    xlab(bquote("Nonparametric" ~ theta)) + ylab("density")
p2 <- ggplot(df, aes(x=scaleThetaFromInf(0, 100, parthetas))) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20)+
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.001))+
    xlab(bquote("Nonparametric" ~ theta)) + ylab("density")
p3 <- ggplot(df, aes(x=parthetas)) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20)+
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.05))+
    xlim(-2.5, 2.5)+
    xlab(bquote("Parametric" ~ theta)) + ylab("density")
p4 <- ggplot(df, aes(x=scaleThetaToInf(0, 100, optthetas))) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20)+
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.05))+
    xlim(-2.5, 2.5)+
    xlab(bquote("Parametric" ~ theta)) + ylab("density")
ggarrange(p1, p4, p2, p3,
          labels = c("Opt.", "Opt.", "GPCM", "GPCM"),
          ncol = 2, nrow = 2)
dev.off()

# ggplot(test, aes(x = theta, fill = method)) +
#     theme_bw() +
#     geom_histogram(aes(y=..density..), position = "identity", alpha = .8, bins = 20) +
#     geom_density(adjust = 1/3, aes(linetype = method, fill=NULL)) + # adjust justerar bandwith, halva default
#     scale_fill_manual(labels = c("GPCM", "Optimal"), values=c("grey10", "grey60")) +
#     scale_linetype_manual(values=c("solid", "dotted"), labels = c("GPCM", "Optimal")) +
#     scale_y_continuous(expand = c(0, 0, 0, 0.001)) +
#     scale_x_continuous(expand = c(0, 0, 0, 0.001)) +
#     xlab("Optimal score index")
#
# ggplot(test2, aes(x = theta, fill = method)) +
#     theme_bw() +
#     geom_histogram(aes(y=..density..), position = "identity", alpha = .8, bins = 25) +
#     geom_density(adjust = 1/2, aes(linetype = method, fill=NULL)) + # adjust justerar bandwith, halva default
#     scale_fill_manual(labels = c("GPCM", "Optimal"), values=c("grey10", "grey60")) +
#     scale_linetype_manual(values=c("solid", "dotted"), labels = c("GPCM", "Optimal")) +
#     scale_y_continuous(expand = c(0, 0, 0, 0.05)) +
#     scale_x_continuous(expand = c(0, 0, 0, 0.001)) +
#     xlab(expression(theta))




plot(parToOpt, optthetas)
plot(scaleThetaFromInf(0, 100,parthetas), optthetas)
plot(optescores, optthetas)
plot(parescores, parthetas)












pdf("plots/NatMath theta plots.pdf")
# densities
d1 <- ggplot(df, aes(x=optthetas)) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20)+
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.001))+
    xlab("Optimal index") + ylab("density")
d2 <- ggplot(df, aes(x=scaleThetaFromInf(0, 100, parthetas))) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20)+
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.001))+
    xlab("Parametric \"index\"") + ylab("density")
d3 <- ggplot(df, aes(x=optescores)) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20)+
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.001))+
    xlab("Optimal expected sum score") + ylab("density")
d4 <- ggplot(df, aes(x=parescores)) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20)+
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.001))+
    xlab("Parametric expected sum score") + ylab("density")
d5 <- ggplot(df, aes(x=sumscores)) +
    theme_bw() +
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 20)+
    geom_density(adjust = 1/2) + # adjust justerar bandwith, halva default
    scale_y_continuous(expand = c(0,0, 0, 0.001))+
    xlab("Actual sum scores") + ylab("density")
ggarrange(d1, d2, d3, d4, d5,
          labels = c("A", "B", "C", "D", "E"),
          ncol = 2, nrow = 3)
# score differences
a <- ggplot(df, aes(x=optescores, y=parescores-optescores)) +
    theme_bw() +
    geom_point() +
    ggtitle("Expected scores") + xlab("optimal") + ylab("parametric-optimal")
b <- ggplot(df, aes(x=parescores, y=parescores-optescores)) +
    theme_bw() +
    geom_point() +
    ggtitle("Expected scores") + xlab("parametric") + ylab("parametric-optimal")
c <- ggplot(df, aes(x=optthetas, y=scaleThetaFromInf(0, 100, parthetas)-optthetas)) +
    theme_bw() +
    geom_point() +
    ggtitle("Index scores") + xlab("optimal") + ylab("parametric-optimal")
d <- ggplot(df, aes(x=scaleThetaFromInf(0, 100, parthetas), y=scaleThetaFromInf(0, 100, parthetas)-optthetas)) +
    theme_bw() +
    geom_point() +
    ggtitle("Index scores") + xlab("parametric") + ylab("parametric-optimal")
e <- ggplot(df, aes(x=scaleThetaToInf(0, 100, optthetas),
                    y=parthetas-scaleThetaToInf(0, 100, optthetas))) +
    theme_bw() +
    geom_point() +
    ggtitle("Index scores") + xlab("optimal") + ylab("parametric-optimal")
f <- ggplot(df, aes(x=parthetas,
                    y=parthetas-scaleThetaToInf(0, 100, optthetas))) +
    theme_bw() +
    geom_point() +
    ggtitle("Index scores") + xlab("parametric") + ylab("parametric-optimal")
ggarrange(a, b, c, d, e, f,
          labels = c("A", "B", "C", "D", "E", "F"),
          ncol = 2, nrow = 3)
dev.off()


divisors <- function(n) {
    res <- c()
    # iterate from 3 to sqrt(n)
    for (i in 2:n) {
        if (n %% i == 0) {
            res <- c(res, i)
        }
    }
    return(res)
}
percFromItemCategory <- function(itemInd, U, size, sup=F) {
    cats <- length(unique(U[, itemInd]))
    U <- U[order(rowSums(U),decreasing=F), ]
    resMat <- matrix(data = 0, nrow = nrow(U)/size, ncol = cats)

    i <- 1
    resRow <- 1
    while (i < nrow(U)) {
        temp <- U[i:min(nrow(U), (i+size-1)), itemInd]
        for (j in 0:(cats-1)) {
            resMat[resRow, j+1] <- sum(temp==j)/length(temp)
        }
        resRow <- resRow+1
        i=i+size
    }
    if (sup) return(-log(resMat, cats)) else return(resMat)
}
percFromItemCategory2 <- function(item, percntrnk, U, nbin, sup=F) {
    cats <- length(unique(U[, item]))
    thetaQnt = seq(0, 100, len = 2 * nbin + 1)
    bdry <- thetaQnt[seq(1, 2 * nbin + 1, by = 2)]
    aves <- rep(0, nbin)
    for (k in 1:nbin) {
        aves[k] <- (bdry[k] + bdry[k + 1])/2
    }

    logMi <- log(cats)
    Uveci <- as.numeric(U[, item])
    Pbin <- matrix(0, nbin, cats)
    Wbin <- matrix(0, nbin, cats)
    indpts <- c(1:nbin)
    for (k in 1:nbin) {
        indk <- percntrnk >= bdry[k] & percntrnk <= bdry[k + 1]
        if (sum(indk) > 0) {
            Uvecik <- Uveci[indk]
            nk <- sum(indk)
            for (m in 0:(cats-1)) {
                Pbin[k, m+1] <- sum(Uvecik == m)/nk
            }
            Wbin[k, ] <- -log(Pbin[k, ])/logMi
        }
        else {
            Pbin[k, ] <- NA
        }
    }
    return(list(Wbin, Pbin))
}


# ICCs --------------------------------------------------------------------
percentiles <- seq(0, 100)/100
cycle <- 1
percentilesopt <- quantile(optthetas, percentiles, type = 7)
percentilespar <- quantile(parthetas, percentiles, type = 7)
# bins <- split(sort(rowSums(U)),  # split sum scores into 15 ordered bins
#               cut(seq_along(sort(rowSums(U))), 15, labels = FALSE))
# binval <- sapply(bins, mean)
# binx <- seq(100/length(bins)/2, 100, 100/length(bins))

pdf("plots/NatMath perc ICC plots 1cyc 2.pdf")
for (i in 1:32) {
    binval <- percFromItemCategory(i, U, 2235/22)
    binvalsup <- percFromItemCategory(i, U, 2235/22, sup=T)
    binval <- percFromItemCategory2(i, NatMath_dataList$percntrnk, U, 22)
    # binx <- seq(100/nrow(binval)/2, 100, 100/nrow(binval))
    binx <- seq(100/nrow(binval[[1]])/2, 100, 100/nrow(binval[[1]]))
    # mirtTheta <- sapply(optescores0to100, sumScoreToIRT, natMathGpcm, thetamin=-3.5, thetamax=3.5, verbose=TRUE)
    icc1 <- comparePercICCs(item = i, mirtModel = natMathGpcm, WfdList = AnalyzeResult$parList[[cycle]]$WfdList,
                        mirtThetas = percentilespar,
                        bincenters = binx, binvalues = binval[[2]],
                        optThetas = percentilesopt, legend = T, grayscale = T)
    icc2 <- comparePercICCs(item = i, mirtModel = natMathGpcm, WfdList = AnalyzeResult$parList[[cycle]]$WfdList,
                        mirtThetas = percentilespar, grayscale = T,
                        bincenters = binx, binvalues = binval[[1]],
                        optThetas = percentilesopt, legend = T, surprisal = T)
    print(ggarrange(icc1, icc2, ncol = 2, nrow = 1, common.legend = T))
}
dev.off()

#
# pdf("plots/NatMath ICC plots.pdf")
# for (i in 1:32) {
#     item <- i
#     opt <- seq(0, 100, 1)
#     optescores0to100 <- testscore(opt,
#                                AnalyzeResult$parList[[cycle]]$WfdList,
#                                NatMath_dataList$optList)
#     mirtTheta <- sapply(optescores0to100, sumScoreToIRT, natMathGpcm, thetamin=-3.5, thetamax=3.5, verbose=TRUE)
#     icc1 <- compareICCs(item, natMathGpcm, AnalyzeResult$parList[[10]]$WfdList,
#                         thetasToShow = opt,
#                         mirtThetas = mirtTheta, optThetas = opt, legend = T)
#     icc2 <- compareICCs(item, natMathGpcm, AnalyzeResult$parList[[10]]$WfdList,
#                         thetasToShow = opt, surprisal = T,
#                         mirtThetas = mirtTheta, optThetas = opt, legend = T)
#     icc3 <- compareICCs(item, natMathGpcm, AnalyzeResult$parList[[10]]$WfdList,
#                         thetasToShow = mirtTheta,
#                         mirtThetas = mirtTheta, optThetas = opt, legend = T)
#     icc4 <- compareICCs(item, natMathGpcm, AnalyzeResult$parList[[10]]$WfdList,
#                         thetasToShow = mirtTheta, surprisal = T,
#                         mirtThetas = mirtTheta, optThetas = opt, legend = T)
#     print(ggarrange(icc1, icc2, icc3, icc4, ncol = 2, nrow = 2, common.legend = T))
# }
# dev.off()
#
# pdf("plots/Expected trans. NatMath ICC plots.pdf")
# item <- 19
# opt <- seq(0, 100, 1)
# optescores0to100 <- testscore(opt,
#                            AnalyzeResult$parList[[cycle]]$WfdList,
#                            NatMath_dataList$optList)
# mirtTheta <- sapply(optescores0to100, sumScoreToIRT, natMathGpcm, thetamin=-3.5, thetamax=3.5, verbose=TRUE)
# icc1 <- compareICCs(item, natMathGpcm, AnalyzeResult$parList[[10]]$WfdList,
#                     thetasToShow = opt,
#                     mirtThetas = mirtTheta, optThetas = opt, legend = T)
# icc2 <- compareICCs(item, natMathGpcm, AnalyzeResult$parList[[10]]$WfdList,
#                     thetasToShow = opt, surprisal = T,
#                     mirtThetas = mirtTheta, optThetas = opt, legend = T)
# icc3 <- compareICCs(item, natMathGpcm, AnalyzeResult$parList[[10]]$WfdList,
#                     thetasToShow = mirtTheta,
#                     mirtThetas = mirtTheta, optThetas = opt, legend = T)
# icc4 <- compareICCs(item, natMathGpcm, AnalyzeResult$parList[[10]]$WfdList,
#                     thetasToShow = mirtTheta, surprisal = T,
#                     mirtThetas = mirtTheta, optThetas = opt, legend = T)
# print(ggarrange(icc1, icc2, icc3, icc4, ncol = 2, nrow = 2, common.legend = T))
# dev.off()

# exponential transformation
# bincent1 <- AnalyzeResult$parList[[1]]$binctr
# bincent2 <- AnalyzeResult$parList[[10]]$binctr
# sbinval1 <- AnalyzeResult$parList[[1]]$WfdList[[item]]$Wbin
# sbinval2 <- AnalyzeResult$parList[[10]]$WfdList[[item]]$Wbin
# pbinval1 <- AnalyzeResult$parList[[1]]$WfdList[[item]]$Pbin
pdf("plots/Logistic trans. NatMath ICC plots.pdf")
icc1 <- plotICCcombo(item, natMathGpcm, AnalyzeResult$parList[[10]]$WfdList,
                     NatMath_dataList$scrfine/60*100, grayscale = F)
icc2 <- plotICCcombo(item, natMathGpcm, AnalyzeResult$parList[[10]]$WfdList,
                     NatMath_dataList$scrfine/60*100, surprisal = T)
icc3 <- plotICCcombo(item, natMathGpcm, AnalyzeResult$parList[[10]]$WfdList,
                     NatMath_dataList$scrfine/60*100, infscale = T)
icc4 <- plotICCcombo(item, natMathGpcm, AnalyzeResult$parList[[10]]$WfdList,
                     NatMath_dataList$scrfine/60*100, surprisal = T, infscale = T)
ggarrange(icc1, icc2, icc3, icc4, ncol = 2, nrow = 2, common.legend = T)
dev.off()
joakimwallmark/PolyOptimalIRT documentation built on Dec. 21, 2021, 1:16 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
joakimwallmark/PolyOptimalIRT

scripts/Empirical example NatMath/NatMathPlots-old.R
In joakimwallmark/PolyOptimalIRT:

R Package Documentation

Browse R Packages

We want your feedback!

joakimwallmark/PolyOptimalIRT

scripts/Empirical example NatMath/NatMathPlots-old.R In joakimwallmark/PolyOptimalIRT:

R Package Documentation

Browse R Packages

We want your feedback!

scripts/Empirical example NatMath/NatMathPlots-old.R
In joakimwallmark/PolyOptimalIRT: